In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mplt
import seaborn as sns
import arviz as az
import datetime
In [2]:
import fit_arima
In [3]:
import open_data
df = open_data.open()
In [4]:
ritorno = fit_arima.compute(2, 1, catene=4, samples_per_chain=2500, burnin=1000)
20:24:12 - cmdstanpy - INFO - CmdStan start processing
chain 1 |          | 00:00 Status
chain 2 |          | 00:00 Status
chain 3 |          | 00:00 Status
chain 4 |          | 00:00 Status
                                                                                                                                                                                                                                                                                                                                
20:49:49 - cmdstanpy - INFO - CmdStan done processing.
20:49:49 - cmdstanpy - WARNING - Non-fatal error during sampling:
Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
	Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
	Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
	Exception: code_model_namespace::log_prob: phi[1][1] is -nan, but must be greater than or equal to -1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
Exception: normal_lpdf: Scale parameter is -1.77241, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: normal_lpdf: Scale parameter is -1.33685, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: normal_lpdf: Scale parameter is -1.88462, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: code_model_namespace::log_prob: phi[2][49] is 1, but must be less than or equal to 1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: code_model_namespace::log_prob: phi[1][17] is 1, but must be less than or equal to 1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: normal_lpdf: Scale parameter is -1.63119, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: code_model_namespace::log_prob: phi[1][1] is -nan, but must be greater than or equal to -1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: code_model_namespace::log_prob: phi[2][24] is -nan, but must be greater than or equal to -1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: normal_lpdf: Scale parameter is -0.00166804, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: normal_lpdf: Scale parameter is -0.014183, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
Exception: normal_lpdf: Scale parameter is -1.93972, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: normal_lpdf: Scale parameter is -0.38646, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 143, column 4 to column 61)
	Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
	Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
	Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 157, column 4 to column 34)
	Exception: code_model_namespace::log_prob: phi[1][1] is -nan, but must be greater than or equal to -1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: code_model_namespace::log_prob: phi[1][5] is 1, but must be less than or equal to 1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
Exception: code_model_namespace::log_prob: phi[2][37] is 1, but must be less than or equal to 1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: code_model_namespace::log_prob: phi[2][11] is 1, but must be less than or equal to 1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: code_model_namespace::log_prob: phi[1][8] is -nan, but must be greater than or equal to -1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
	Exception: code_model_namespace::log_prob: phi[1][8] is 1, but must be less than or equal to 1.000000 (in '/home/br1/PythonProjects/PythonStats/git_bayesian_copie/ARIMA/NoCovariates/code.stan', line 83, column 2 to column 47)
Consider re-running with show_console=True if the above output is unclear!

/home/br1/PythonProjects/PythonStats/ARIMA_last_computation/NoCovariates/fit_arima.py:37: FutureWarning: Indexing with a float is deprecated, and will raise an IndexError in pandas 2.0. You can manually convert to an integer key instead.
  y_missing_list.append({'Stazione':df.columns[v[i]],'Data':df.index[u[i]],'Samples':inference_data.posterior.w.values[:,:,i].reshape(catene*samples_per_chain)})

Tempo di computazione: 25:37

In [5]:
res = az.loo(ritorno['inference_data'], var_name="log_lik", pointwise=True)
res
/home/br1/PythonProjects/PythonStats/.venv/lib/python3.10/site-packages/arviz/stats/stats.py:803: UserWarning: Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.
  warnings.warn(
Out[5]:
Computed from 10000 posterior samples and 17191 observations log-likelihood matrix.

         Estimate       SE
elpd_loo  7252.62   154.96
p_loo      296.80        -

There has been a warning during the calculation. Please check the results.
------

Pareto k diagnostic values:
                         Count   Pct.
(-Inf, 0.5]   (good)     17099   99.5%
 (0.5, 0.7]   (ok)          80    0.5%
   (0.7, 1]   (bad)         12    0.1%
   (1, Inf)   (very bad)     0    0.0%
In [6]:
aux_plt = az.plot_khat(res, show_bins=True, figsize=(7,5))
In [7]:
res = az.waic(ritorno['inference_data'], var_name="log_lik")
res
/home/br1/PythonProjects/PythonStats/.venv/lib/python3.10/site-packages/arviz/stats/stats.py:1645: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail. 
See http://arxiv.org/abs/1507.04544 for details
  warnings.warn(
Out[7]:
Computed from 10000 posterior samples and 17191 observations log-likelihood matrix.

          Estimate       SE
elpd_waic  7264.00   154.85
p_waic      285.43        -

There has been a warning during the calculation. Please check the results.
In [8]:
plt_aux = az.plot_trace(ritorno['inference_data'], var_names=['phi','theta','hyper_gamma_phi_m','hyper_gamma_phi_s'], divergences=True, figsize=(15,15))
In [9]:
plt_aux = az.plot_trace(ritorno['inference_data'], var_names=['mh_sigma','sh_sigma','sigma'], divergences=True, figsize=(15,10))
In [10]:
plt_aux = az.plot_trace(ritorno['inference_data'], var_names=['hyper_y_start_m','hyper_y_start_s','c'], divergences=True, figsize=(15,10))
In [11]:
az.summary(ritorno['inference_data'],var_names=['y_start'])
Out[11]:
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
y_start[0, 0] 1.421 0.089 1.254 1.588 0.002 0.001 2237.0 3725.0 1.0
y_start[0, 1] 1.420 0.089 1.247 1.586 0.002 0.001 2358.0 3599.0 1.0
y_start[0, 2] 1.424 0.093 1.249 1.597 0.002 0.001 2835.0 4100.0 1.0
y_start[0, 3] 1.424 0.090 1.248 1.588 0.002 0.001 3154.0 4000.0 1.0
y_start[0, 4] 1.427 0.091 1.259 1.600 0.002 0.001 2664.0 3541.0 1.0
... ... ... ... ... ... ... ... ... ...
y_start[2, 44] 1.445 0.054 1.346 1.549 0.001 0.001 1481.0 2555.0 1.0
y_start[2, 45] 1.442 0.055 1.338 1.545 0.001 0.001 1620.0 2722.0 1.0
y_start[2, 46] 1.438 0.054 1.338 1.538 0.001 0.001 1732.0 2540.0 1.0
y_start[2, 47] 1.452 0.055 1.347 1.552 0.001 0.001 1487.0 2769.0 1.0
y_start[2, 48] 1.459 0.056 1.355 1.562 0.001 0.001 1563.0 2712.0 1.0

147 rows × 9 columns

In [12]:
az.summary(ritorno['inference_data'], var_names=['hyper_y_start_m', 'hyper_y_start_s'])
Out[12]:
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
hyper_y_start_m 1.432 0.037 1.367 1.504 0.002 0.001 431.0 772.0 1.0
hyper_y_start_s 0.082 0.012 0.061 0.105 0.001 0.000 580.0 1215.0 1.0
In [13]:
aux_shape = ritorno['inference_data'].posterior.gamma_th.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.gamma_th.values.reshape((aux_shape[0]*aux_shape[1])), legend=False)
Out[13]:
<AxesSubplot: ylabel='Density'>
In [14]:
aux_shape = ritorno['inference_data'].posterior.phi.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.phi.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2],aux_shape[3]))[:,0,:], legend=False)
Out[14]:
<AxesSubplot: ylabel='Density'>
In [15]:
aux_shape = ritorno['inference_data'].posterior.phi.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.phi.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2],aux_shape[3]))[:,1,:], legend=False)
Out[15]:
<AxesSubplot: ylabel='Density'>
In [16]:
aux_shape = ritorno['inference_data'].posterior.c.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.c.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), legend=False)
Out[16]:
<AxesSubplot: ylabel='Density'>
In [17]:
aux_shape = ritorno['inference_data'].posterior.annual_mean.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.annual_mean.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), legend=False)
Out[17]:
<AxesSubplot: ylabel='Density'>
In [18]:
aux_shape = ritorno['inference_data'].posterior.annual_median.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.annual_median.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), legend=False)
Out[18]:
<AxesSubplot: ylabel='Density'>
In [19]:
aux_shape = ritorno['inference_data'].posterior.annual_max.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.annual_max.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), legend=False)
Out[19]:
<AxesSubplot: ylabel='Density'>
In [20]:
valori_31_dic = []
for dizionario in ritorno['missing']:
    if dizionario['Data'] == '2018-12-31':
        valori_31_dic.append(dizionario['Samples'])
        
res_plot = sns.kdeplot(valori_31_dic, legend=False)
In [21]:
aux_shape = ritorno['inference_data'].posterior.annual_days_over_threshold.values.shape
sns.kdeplot(ritorno['inference_data'].posterior.annual_days_over_threshold.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), legend=False)
Out[21]:
<AxesSubplot: ylabel='Density'>
In [22]:
pd.set_option('display.max_columns', 500)
aux_results = pd.DataFrame(ritorno['inference_data'].posterior.annual_days_over_threshold.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), columns=df.columns).describe()
aux_results.sort_values('mean', axis=1, ascending=True)
Out[22]:
CASTELLUCCIO SAVIGNANO DI RIGO CORTE BRUGNATELLA FEBBIO SAN LEO VERUCCHIO BADIA GIARDINI MARGHERITA MARECCHIA SAN PIETRO CAPOFIUME PARCO BERTOZZI LUGAGNANO DE AMICIS DELTA CERVIA SAN LAZZARO PARCO RESISTENZA FRANCHINI-ANGELONI GHERARDI VIA CHIARINI GAVELLO SAVIGNANO BESENZONE VILLA FULVIA ROMA PORTA SAN FELICE CASTELLARANO CAORLE PARCO MONTECUCCO ZALAMELLA CENTO PARCO EDILCARANI BOGOLESE SARAGAT PARADIGNA S. LAZZARO MALCANTONE REMESINA S. ROCCO ISONZO CENO PARCO FERRARI CITTADELLA GIORDANI-FARNESE SAN FRANCESCO FLAMINIA MONTEBELLO GIARDINI GERBIDO TIMAVO
count 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000
mean 0.106700 0.116400 0.119700 0.289000 1.559900 4.887700 7.687600 13.404600 15.672500 15.793200 17.390400 17.684900 17.879000 17.911000 18.899100 19.614500 20.996000 21.328200 21.453700 23.363100 23.59780 24.217700 28.324600 28.342200 28.789300 29.24280 29.529800 30.391600 30.775400 31.16390 31.796000 32.263300 34.652500 35.675200 36.286400 36.308700 37.110300 38.380500 42.287200 42.403800 42.96810 46.064900 46.171700 50.864500 51.161100 54.630100 60.347000 63.473900 70.158600
std 0.391574 0.382185 0.408642 0.673739 1.934473 4.081612 5.397863 8.422006 8.934071 9.851515 10.357901 9.836335 9.548742 9.619789 10.460395 10.308885 11.223385 11.700238 11.404677 11.355472 11.55254 12.700694 13.488366 13.409634 13.087469 11.81637 14.628524 13.945031 14.034288 14.23678 12.686799 13.744761 14.303105 14.884061 15.056349 15.017036 14.948314 15.692971 16.041161 17.171483 16.33913 16.927479 17.198482 17.002725 18.762461 18.677081 19.584973 22.603575 21.960589
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 2.00000 0.000000 0.000000 0.000000 2.00000 3.000000 2.000000 3.000000 3.000000 3.000000 3.000000 2.000000 2.000000 2.000000 4.000000 5.00000 6.000000 3.000000 6.000000 5.000000 8.000000 5.000000 5.000000 8.000000
25% 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 4.000000 7.000000 9.000000 9.000000 10.000000 11.000000 11.000000 11.000000 11.000000 12.000000 13.000000 13.000000 13.000000 15.000000 15.00000 15.000000 19.000000 19.000000 19.000000 21.00000 19.000000 20.000000 21.000000 21.00000 23.000000 22.000000 24.000000 25.000000 26.000000 26.000000 26.000000 27.000000 31.000000 30.000000 31.00000 34.000000 34.000000 39.000000 38.000000 41.000000 46.000000 48.000000 55.000000
50% 0.000000 0.000000 0.000000 0.000000 1.000000 4.000000 7.000000 12.000000 14.000000 14.000000 16.000000 16.000000 16.000000 16.000000 17.000000 18.000000 19.000000 19.000000 19.000000 22.000000 22.00000 22.000000 26.000000 26.000000 27.000000 28.00000 27.000000 28.000000 29.000000 29.00000 30.000000 30.000000 33.000000 34.000000 34.000000 34.000000 35.000000 36.000000 41.000000 40.000000 41.00000 44.000000 44.000000 49.000000 49.000000 53.000000 59.000000 61.000000 68.000000
75% 0.000000 0.000000 0.000000 0.000000 2.000000 7.000000 10.000000 18.000000 20.000000 21.000000 23.000000 23.000000 23.000000 23.000000 24.000000 25.000000 27.000000 28.000000 27.000000 30.000000 30.00000 31.000000 36.000000 36.000000 36.000000 36.00000 38.000000 38.000000 39.000000 39.00000 39.000000 40.000000 43.000000 44.000000 45.000000 45.000000 46.000000 48.000000 52.000000 52.000000 52.00000 56.000000 57.000000 61.000000 62.000000 66.000000 72.000000 77.000000 84.000000
max 5.000000 4.000000 7.000000 7.000000 21.000000 46.000000 46.000000 71.000000 82.000000 104.000000 131.000000 93.000000 86.000000 84.000000 92.000000 108.000000 105.000000 104.000000 104.000000 99.000000 103.00000 108.000000 112.000000 154.000000 110.000000 113.00000 146.000000 122.000000 135.000000 127.00000 98.000000 142.000000 106.000000 136.000000 127.000000 114.000000 129.000000 127.000000 144.000000 160.000000 165.00000 132.000000 146.000000 136.000000 177.000000 145.000000 169.000000 179.000000 180.000000
In [23]:
pd.set_option('display.max_columns', 500)
aux_results = pd.DataFrame(ritorno['inference_data'].posterior.is_over_daily_limit.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), columns=df.columns).describe()
aux_results.sort_values('mean', axis=1, ascending=True)
Out[23]:
CASTELLUCCIO FEBBIO CORTE BRUGNATELLA SAVIGNANO DI RIGO SAN LEO VERUCCHIO BADIA GIARDINI MARGHERITA MARECCHIA SAN PIETRO CAPOFIUME DE AMICIS DELTA CERVIA LUGAGNANO PARCO BERTOZZI SAN LAZZARO PARCO RESISTENZA FRANCHINI-ANGELONI VIA CHIARINI GHERARDI GAVELLO SAVIGNANO BESENZONE ROMA VILLA FULVIA PORTA SAN FELICE CASTELLARANO CAORLE PARCO MONTECUCCO ZALAMELLA CENTO PARCO EDILCARANI BOGOLESE SARAGAT PARADIGNA S. LAZZARO MALCANTONE REMESINA S. ROCCO CENO ISONZO PARCO FERRARI GIORDANI-FARNESE CITTADELLA FLAMINIA SAN FRANCESCO MONTEBELLO GERBIDO GIARDINI TIMAVO
count 10000.0 10000.0 10000.0 10000.0 10000.0 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000 10000.000000 10000.000000
mean 0.0 0.0 0.0 0.0 0.0 0.000400 0.001300 0.019800 0.030800 0.042900 0.051900 0.051900 0.052600 0.05670 0.072300 0.075800 0.104700 0.110000 0.110300 0.138200 0.146200 0.164400 0.253800 0.257000 0.262300 0.265700 0.289000 0.308800 0.318600 0.330200 0.339400 0.357600 0.428000 0.456100 0.464900 0.472600 0.495100 0.525300 0.618500 0.629300 0.651200 0.714200 0.715000 0.797300 0.81890 0.854000 0.906000 0.911000 0.961900
std 0.0 0.0 0.0 0.0 0.0 0.019997 0.036034 0.139319 0.172784 0.202642 0.221836 0.221836 0.223245 0.23128 0.258997 0.264691 0.306182 0.312905 0.313279 0.345127 0.353324 0.370657 0.435206 0.437001 0.439907 0.441727 0.453321 0.462022 0.465957 0.470308 0.473529 0.479317 0.494814 0.498094 0.498791 0.499274 0.500001 0.499384 0.485779 0.483016 0.476614 0.451817 0.451437 0.402031 0.38512 0.353124 0.291843 0.284758 0.191447
min 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000
25% 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000
50% 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000
75% 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000
max 0.0 0.0 0.0 0.0 0.0 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000 1.000000
In [24]:
aux_results.loc['mean',:].plot(kind='barh', figsize=(15,15), xticks=np.linspace(0,1,11))
#res = plt.xticks(rotation = 30)
Out[24]:
<AxesSubplot: >
In [25]:
pd.set_option('display.max_columns', 500)
aux_results = pd.DataFrame(ritorno['inference_data'].posterior.is_over_annual_limit.values.reshape((aux_shape[0]*aux_shape[1],aux_shape[2])), columns=df.columns).describe()
aux_results.sort_values('mean', axis=1, ascending=True)
Out[25]:
CASTELLUCCIO SARAGAT CENTO PARCO MONTECUCCO S. ROCCO GHERARDI VILLA FULVIA PORTA SAN FELICE MALCANTONE BESENZONE DELTA CERVIA PARCO EDILCARANI CASTELLARANO S. LAZZARO LUGAGNANO SAN LAZZARO GAVELLO SAVIGNANO FEBBIO CORTE BRUGNATELLA SAVIGNANO DI RIGO SAN LEO VERUCCHIO BADIA SAN PIETRO CAPOFIUME REMESINA PARCO RESISTENZA MARECCHIA DE AMICIS VIA CHIARINI FRANCHINI-ANGELONI GIARDINI MARGHERITA PARCO BERTOZZI ROMA BOGOLESE PARADIGNA ZALAMELLA CAORLE ISONZO PARCO FERRARI CITTADELLA GIORDANI-FARNESE SAN FRANCESCO CENO FLAMINIA GIARDINI MONTEBELLO GERBIDO TIMAVO
count 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0 10000.0000 10000.0000 10000.0000 10000.0000 10000.0000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000 10000.000000 10000.000000
mean 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0001 0.0001 0.0001 0.0001 0.0001 0.000200 0.000200 0.000300 0.000300 0.000400 0.000400 0.000500 0.001200 0.001400 0.00180 0.012300 0.013500
std 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0100 0.0100 0.0100 0.0100 0.0100 0.014141 0.014141 0.017319 0.017319 0.019997 0.019997 0.022356 0.034622 0.037392 0.04239 0.110227 0.115408
min 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0000 0.0000 0.0000 0.0000 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000
25% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0000 0.0000 0.0000 0.0000 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000
50% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0000 0.0000 0.0000 0.0000 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000
75% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0000 0.0000 0.0000 0.0000 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000
max 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0000 1.0000 1.0000 1.0000 1.0000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 1.000000 1.000000
In [26]:
aux_results.loc['mean',:].plot(kind='barh', figsize=(15,15), xticks=np.linspace(0,1,11))
#res = plt.xticks(rotation = 30)
Out[26]:
<AxesSubplot: >
In [27]:
y_post_pred = ritorno['inference_data'].posterior.y_post_pred.to_numpy()
y_post_pred = y_post_pred.reshape(y_post_pred.shape[0]*y_post_pred.shape[1], y_post_pred.shape[2], y_post_pred.shape[3])
y_post_pred.shape
Out[27]:
(10000, 365, 49)
In [28]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox
from IPython.display import display
In [29]:
@widgets.interact(stazione = df.columns)
def f(stazione):
    col_map = sns.light_palette((20, 75, 70), input='husl', as_cmap=True)

    plt.figure(figsize=(14,8))
    ax = plt.subplot(1,1,1)

    idx_stazione = df.columns.to_list().index(stazione)
    
    """
    sns.lineplot(np.transpose(y_post_pred[0:50,:,idx_stazione]))
    """

    lower_lim = np.zeros(len(df.index))
    upper_lim = np.zeros(len(df.index))    
    for i in range(len(df.index)):
        lower_lim[i] = np.percentile(y_post_pred[:,i,idx_stazione],2.5)
        upper_lim[i] = np.percentile(y_post_pred[:,i,idx_stazione],97.5)
    
    
#    sns.lineplot(lower_lim)
#    sns.lineplot(upper_lim)
    
    for i in range(len(df.index)):
        ax.add_patch(mplt.patches.Rectangle((i,lower_lim[i]),1,upper_lim[i]-lower_lim[i], fill=True, color=col_map(180)))

    sns.lineplot(df[stazione])    
    sns.lineplot(np.mean(y_post_pred[:,:,idx_stazione], axis=0))


    index = 0
    for line in ax.get_lines():
        if index == 0:
            col_map = sns.dark_palette((230,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(155))
        else:
            col_map = sns.dark_palette((10,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(175))
        index += 1
        
    plt.ylim(bottom=0,top=2.5)

    primo_giorno = datetime.date(2018,1,1)
    date_da_segnare = []
    date_da_segnare_posizioni = []

    for i in range(12):
        date_da_segnare.append(datetime.date(2018,i+1,1))
        date_da_segnare_posizioni.append((date_da_segnare[2*i] - primo_giorno).days)
        date_da_segnare.append(datetime.date(2018,i+1,15))
        date_da_segnare_posizioni.append((date_da_segnare[2*i+1] - primo_giorno).days)
        date_da_segnare[2*i] = date_da_segnare[2*i].isoformat()
        date_da_segnare[2*i+1] = date_da_segnare[2*i+1].isoformat()

    plt.xticks(date_da_segnare_posizioni,date_da_segnare,rotation=65)
    plt.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom=True,      # ticks along the bottom edge are off
        top=False,         # ticks along the top edge are off
        labelbottom=True)
    plt.grid()
    
    """
    ax.get_legend().remove()
    col_vals = np.linspace(1,255,num=len(df.columns))
    index = 0
    for line in ax.get_lines():
        if(index == len(ax.get_lines()) - 1):
            col_map = sns.dark_palette((230,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(175))
            continue
        if(index == len(ax.get_lines()) - 2):
            col_map = sns.dark_palette((120,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(175))
            index += 1
            continue

        #line.set_c(col_map(int(np.round(col_vals[index]))))
        line.set_c(col_map(220))
        line.set_alpha(0.3)
        index += 1
    """
    
    

    plt.show()
interactive(children=(Dropdown(description='stazione', options=('CASTELLUCCIO', 'FEBBIO', 'CORTE BRUGNATELLA',…
In [30]:
def f(stazione):
    col_map = sns.light_palette((20, 75, 70), input='husl', as_cmap=True)

    plt.figure(figsize=(14,8))
    ax = plt.subplot(1,1,1)

    idx_stazione = df.columns.to_list().index(stazione)
    
    """
    sns.lineplot(np.transpose(y_post_pred[0:50,:,idx_stazione]))
    """

    lower_lim = np.zeros(len(df.index))
    upper_lim = np.zeros(len(df.index))    
    for i in range(len(df.index)):
        lower_lim[i] = np.percentile(y_post_pred[:,i,idx_stazione],2.5)
        upper_lim[i] = np.percentile(y_post_pred[:,i,idx_stazione],97.5)
    
    
#    sns.lineplot(lower_lim)
#    sns.lineplot(upper_lim)
    
    for i in range(len(df.index)):
        ax.add_patch(mplt.patches.Rectangle((i,lower_lim[i]),1,upper_lim[i]-lower_lim[i], fill=True, color=col_map(180)))

    sns.lineplot(df[stazione])    
    sns.lineplot(np.mean(y_post_pred[:,:,idx_stazione], axis=0))


    index = 0
    for line in ax.get_lines():
        if index == 0:
            col_map = sns.dark_palette((230,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(155))
        else:
            col_map = sns.dark_palette((10,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(175))
        index += 1
        
    plt.ylim(bottom=0,top=2.5)

    primo_giorno = datetime.date(2018,1,1)
    date_da_segnare = []
    date_da_segnare_posizioni = []

    for i in range(12):
        date_da_segnare.append(datetime.date(2018,i+1,1))
        date_da_segnare_posizioni.append((date_da_segnare[2*i] - primo_giorno).days)
        date_da_segnare.append(datetime.date(2018,i+1,15))
        date_da_segnare_posizioni.append((date_da_segnare[2*i+1] - primo_giorno).days)
        date_da_segnare[2*i] = date_da_segnare[2*i].isoformat()
        date_da_segnare[2*i+1] = date_da_segnare[2*i+1].isoformat()

    plt.xticks(date_da_segnare_posizioni,date_da_segnare,rotation=65)
    plt.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom=True,      # ticks along the bottom edge are off
        top=False,         # ticks along the top edge are off
        labelbottom=True)
    plt.grid()
    
    """
    ax.get_legend().remove()
    col_vals = np.linspace(1,255,num=len(df.columns))
    index = 0
    for line in ax.get_lines():
        if(index == len(ax.get_lines()) - 1):
            col_map = sns.dark_palette((230,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(175))
            continue
        if(index == len(ax.get_lines()) - 2):
            col_map = sns.dark_palette((120,90,65), input='husl', as_cmap=True)
            line.set_c(col_map(175))
            index += 1
            continue

        #line.set_c(col_map(int(np.round(col_vals[index]))))
        line.set_c(col_map(220))
        line.set_alpha(0.3)
        index += 1
    """
    
    

    plt.show()
In [31]:
for stazione in df.columns:
    f(stazione)
In [ ]:
 
In [ ]:
 
In [ ]: